{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.decomposition import PCA\n",
    "import scipy.io as sio\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import preprocessing\n",
    "import os\n",
    "import random\n",
    "from random import shuffle\n",
    "from skimage.transform import rotate\n",
    "import scipy.ndimage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadIndianPinesData():\n",
    "    data_path = os.path.join(os.getcwd(),'Data')\n",
    "    data = sio.loadmat(os.path.join(data_path, 'Indian_pines_corrected.mat'))['indian_pines_corrected']\n",
    "    labels = sio.loadmat(os.path.join(data_path, 'Indian_pines_gt.mat'))['indian_pines_gt']\n",
    "    \n",
    "    return data, labels\n",
    "\n",
    "def splitTrainTestSet(X, y, testRatio=0.10):\n",
    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testRatio, random_state=345,\n",
    "                                                        stratify=y)\n",
    "    return X_train, X_test, y_train, y_test\n",
    "\n",
    "def oversampleWeakClasses(X, y):\n",
    "    uniqueLabels, labelCounts = np.unique(y, return_counts=True)\n",
    "    maxCount = np.max(labelCounts)\n",
    "    labelInverseRatios = maxCount / labelCounts  \n",
    "    # repeat for every label and concat\n",
    "    newX = X[y == uniqueLabels[0], :, :, :].repeat(round(labelInverseRatios[0]), axis=0)\n",
    "    newY = y[y == uniqueLabels[0]].repeat(round(labelInverseRatios[0]), axis=0)\n",
    "    for label, labelInverseRatio in zip(uniqueLabels[1:], labelInverseRatios[1:]):\n",
    "        cX = X[y== label,:,:,:].repeat(round(labelInverseRatio), axis=0)\n",
    "        cY = y[y == label].repeat(round(labelInverseRatio), axis=0)\n",
    "        newX = np.concatenate((newX, cX))\n",
    "        newY = np.concatenate((newY, cY))\n",
    "    np.random.seed(seed=42)\n",
    "    rand_perm = np.random.permutation(newY.shape[0])\n",
    "    newX = newX[rand_perm, :, :, :]\n",
    "    newY = newY[rand_perm]\n",
    "    return newX, newY\n",
    "\n",
    "def standartizeData(X):\n",
    "    newX = np.reshape(X, (-1, X.shape[2]))\n",
    "    scaler = preprocessing.StandardScaler().fit(newX)  \n",
    "    newX = scaler.transform(newX)\n",
    "    newX = np.reshape(newX, (X.shape[0],X.shape[1],X.shape[2]))\n",
    "    return newX, scaler\n",
    "\n",
    "def applyPCA(X, numComponents=75):\n",
    "    newX = np.reshape(X, (-1, X.shape[2]))\n",
    "    pca = PCA(n_components=numComponents, whiten=True)\n",
    "    newX = pca.fit_transform(newX)\n",
    "    newX = np.reshape(newX, (X.shape[0],X.shape[1], numComponents))\n",
    "    return newX, pca\n",
    "\n",
    "def padWithZeros(X, margin=2):\n",
    "    newX = np.zeros((X.shape[0] + 2 * margin, X.shape[1] + 2* margin, X.shape[2]))\n",
    "    x_offset = margin\n",
    "    y_offset = margin\n",
    "    newX[x_offset:X.shape[0] + x_offset, y_offset:X.shape[1] + y_offset, :] = X\n",
    "    return newX\n",
    "\n",
    "def createPatches(X, y, windowSize=5, removeZeroLabels = True):\n",
    "    margin = int((windowSize - 1) / 2)\n",
    "    zeroPaddedX = padWithZeros(X, margin=margin)\n",
    "    # split patches\n",
    "    patchesData = np.zeros((X.shape[0] * X.shape[1], windowSize, windowSize, X.shape[2]))\n",
    "    patchesLabels = np.zeros((X.shape[0] * X.shape[1]))\n",
    "    patchIndex = 0\n",
    "    for r in range(margin, zeroPaddedX.shape[0] - margin):\n",
    "        for c in range(margin, zeroPaddedX.shape[1] - margin):\n",
    "            patch = zeroPaddedX[r - margin:r + margin + 1, c - margin:c + margin + 1]   \n",
    "            patchesData[patchIndex, :, :, :] = patch\n",
    "            patchesLabels[patchIndex] = y[r-margin, c-margin]\n",
    "            patchIndex = patchIndex + 1\n",
    "    if removeZeroLabels:\n",
    "        patchesData = patchesData[patchesLabels>0,:,:,:]\n",
    "        patchesLabels = patchesLabels[patchesLabels>0]\n",
    "        patchesLabels -= 1\n",
    "    return patchesData, patchesLabels\n",
    "\n",
    "\n",
    "def AugmentData(X_train):\n",
    "    for i in range(int(X_train.shape[0]/2)):\n",
    "        patch = X_train[i,:,:,:]\n",
    "        num = random.randint(0,2)\n",
    "        if (num == 0):\n",
    "            \n",
    "            flipped_patch = np.flipud(patch)\n",
    "        if (num == 1):\n",
    "            \n",
    "            flipped_patch = np.fliplr(patch)\n",
    "        if (num == 2):\n",
    "            \n",
    "            no = random.randrange(-180,180,30)\n",
    "            flipped_patch = scipy.ndimage.interpolation.rotate(patch, no,axes=(1, 0),\n",
    "                                                               reshape=False, output=None, order=3, mode='constant', cval=0.0, prefilter=False)\n",
    "    \n",
    "    \n",
    "    patch2 = flipped_patch\n",
    "    X_train[i,:,:,:] = patch2\n",
    "    \n",
    "    return X_train\n",
    "\n",
    "\n",
    "def savePreprocessedData(X_trainPatches, X_testPatches, y_trainPatches, y_testPatches, windowSize, wasPCAapplied = False, numPCAComponents = 0, testRatio = 0.25):\n",
    "    if wasPCAapplied:\n",
    "        with open(\"X_trainPatches_\" + str(windowSize) + \"PCA\" + str(numPCAComponents) + \"testRatio\" + str(testRatio) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, X_trainPatches)\n",
    "        with open(\"X_testPatches_\" + str(windowSize) + \"PCA\" + str(numPCAComponents) + \"testRatio\" + str(testRatio) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, X_testPatches)\n",
    "        with open(\"y_trainPatches_\" + str(windowSize) + \"PCA\" + str(numPCAComponents) + \"testRatio\" + str(testRatio) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, y_trainPatches)\n",
    "        with open(\"y_testPatches_\" + str(windowSize) + \"PCA\" + str(numPCAComponents) + \"testRatio\" + str(testRatio) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, y_testPatches)\n",
    "    else:\n",
    "        with open(\"../preprocessedData/XtrainWindowSize\" + str(windowSize) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, X_trainPatches)\n",
    "        with open(\"../preprocessedData/XtestWindowSize\" + str(windowSize) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, X_testPatches)\n",
    "        with open(\"../preprocessedData/ytrainWindowSize\" + str(windowSize) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, y_trainPatches)\n",
    "        with open(\"../preprocessedData/ytestWindowSize\" + str(windowSize) + \".npy\", 'bw') as outfile:\n",
    "            np.save(outfile, y_testPatches)\n",
    "            \n",
    "            \n",
    "            \n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the Global values (windowSize, numPCAcomponents, testRatio) from the text file global_variables.txt\n",
    "myFile = open('global_variables.txt', 'r') \n",
    "file = myFile.readlines()[:]\n",
    "\n",
    "\n",
    "for line in file:\n",
    "\n",
    "    if line[0:3] == \"win\":\n",
    "\n",
    "        ds = line.find('=')\n",
    "        windowSize = int(line[ds+1:-1],10)\n",
    "\n",
    "    elif line[0:3] == \"num\":\n",
    "\n",
    "        ds = line.find('=')\n",
    "        numPCAcomponents = int(line[ds+2:-1],10)\n",
    "\n",
    "    else:\n",
    "\n",
    "        ds = line.find('=')\n",
    "        testRatio = float(line[ds+1:])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Global Variables\n",
    "#numPCAComponents = 30\n",
    "#windowSize = 5\n",
    "#testRatio = 0.25"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = loadIndianPinesData()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "X,pca = applyPCA(X,numPCAcomponents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "XPatches, yPatches = createPatches(X, y, windowSize=windowSize)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = splitTrainTestSet(XPatches, yPatches, testRatio)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, y_train = oversampleWeakClasses(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = AugmentData(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "savePreprocessedData(X_train, X_test, y_train, y_test, windowSize = windowSize, \n",
    "                     wasPCAapplied=True, numPCAComponents = numPCAcomponents,testRatio = testRatio)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
